{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "regular-mapping",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:07:43.010171Z",
     "iopub.status.busy": "2021-06-29T07:07:43.009362Z",
     "iopub.status.idle": "2021-06-29T07:07:44.153574Z",
     "shell.execute_reply": "2021-06-29T07:07:44.152839Z",
     "shell.execute_reply.started": "2021-06-29T06:44:08.483510Z"
    },
    "papermill": {
     "duration": 1.167667,
     "end_time": "2021-06-29T07:07:44.153762",
     "exception": false,
     "start_time": "2021-06-29T07:07:42.986095",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "import numpy as np \n",
    "import pandas as pd\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.model_selection import cross_val_predict\n",
    "from sklearn.metrics import roc_auc_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "residential-google",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:07:44.183457Z",
     "iopub.status.busy": "2021-06-29T07:07:44.182875Z",
     "iopub.status.idle": "2021-06-29T07:07:46.190466Z",
     "shell.execute_reply": "2021-06-29T07:07:46.189385Z",
     "shell.execute_reply.started": "2021-06-29T06:44:08.493050Z"
    },
    "papermill": {
     "duration": 2.022931,
     "end_time": "2021-06-29T07:07:46.190647",
     "exception": false,
     "start_time": "2021-06-29T07:07:44.167716",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "train = pd.read_csv(\"../input/tabular-playground-series-jan-2021/train.csv\")\n",
    "test = pd.read_csv(\"../input/tabular-playground-series-jan-2021/test.csv\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "excess-terminal",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:07:46.213055Z",
     "iopub.status.busy": "2021-06-29T07:07:46.212397Z",
     "iopub.status.idle": "2021-06-29T07:07:46.398777Z",
     "shell.execute_reply": "2021-06-29T07:07:46.398254Z",
     "shell.execute_reply.started": "2021-06-29T06:44:10.056819Z"
    },
    "papermill": {
     "duration": 0.199347,
     "end_time": "2021-06-29T07:07:46.398917",
     "exception": false,
     "start_time": "2021-06-29T07:07:46.199570",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "train = train.fillna(-1).drop([\"id\", \"target\"], axis=1)\n",
    "test = test.fillna(-1).drop([\"id\"], axis=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "accomplished-values",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:07:46.422088Z",
     "iopub.status.busy": "2021-06-29T07:07:46.421023Z",
     "iopub.status.idle": "2021-06-29T07:07:46.491176Z",
     "shell.execute_reply": "2021-06-29T07:07:46.490650Z",
     "shell.execute_reply.started": "2021-06-29T06:44:10.270005Z"
    },
    "papermill": {
     "duration": 0.083484,
     "end_time": "2021-06-29T07:07:46.491305",
     "exception": false,
     "start_time": "2021-06-29T07:07:46.407821",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "X = train.append(test)\n",
    "y = [0] * len(train) + [1] * len(test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "female-robert",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:07:46.628342Z",
     "iopub.status.busy": "2021-06-29T07:07:46.627720Z",
     "iopub.status.idle": "2021-06-29T07:14:45.773182Z",
     "shell.execute_reply": "2021-06-29T07:14:45.773908Z",
     "shell.execute_reply.started": "2021-06-29T06:44:10.339273Z"
    },
    "papermill": {
     "duration": 419.274106,
     "end_time": "2021-06-29T07:14:45.774184",
     "exception": false,
     "start_time": "2021-06-29T07:07:46.500078",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": [
    "model = RandomForestClassifier()\n",
    "cv_preds = cross_val_predict(model, X, y, cv=5, n_jobs=-1, method='predict_proba')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "compact-stockholm",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:14:45.800851Z",
     "iopub.status.busy": "2021-06-29T07:14:45.799879Z",
     "iopub.status.idle": "2021-06-29T07:14:46.066310Z",
     "shell.execute_reply": "2021-06-29T07:14:46.066816Z",
     "shell.execute_reply.started": "2021-06-29T06:51:10.939785Z"
    },
    "papermill": {
     "duration": 0.27967,
     "end_time": "2021-06-29T07:14:46.066995",
     "exception": false,
     "start_time": "2021-06-29T07:14:45.787325",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.4992435648000001\n"
     ]
    }
   ],
   "source": [
    "print(roc_auc_score(y_true=y, y_score=cv_preds[:,1]))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "afraid-electric",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:14:46.089029Z",
     "iopub.status.busy": "2021-06-29T07:14:46.088085Z",
     "iopub.status.idle": "2021-06-29T07:14:46.094127Z",
     "shell.execute_reply": "2021-06-29T07:14:46.094608Z",
     "shell.execute_reply.started": "2021-06-29T06:59:10.825930Z"
    },
    "papermill": {
     "duration": 0.018477,
     "end_time": "2021-06-29T07:14:46.094768",
     "exception": false,
     "start_time": "2021-06-29T07:14:46.076291",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1408\n"
     ]
    }
   ],
   "source": [
    "print(np.sum(cv_preds[:len(X), 1] > 0.5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "respected-petite",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:14:46.117185Z",
     "iopub.status.busy": "2021-06-29T07:14:46.116269Z",
     "iopub.status.idle": "2021-06-29T07:18:06.667034Z",
     "shell.execute_reply": "2021-06-29T07:18:06.667561Z",
     "shell.execute_reply.started": "2021-06-29T06:51:11.198693Z"
    },
    "papermill": {
     "duration": 200.563466,
     "end_time": "2021-06-29T07:18:06.667769",
     "exception": false,
     "start_time": "2021-06-29T07:14:46.104303",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RandomForestClassifier()"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.fit(X, y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "boring-patio",
   "metadata": {
    "execution": {
     "iopub.execute_input": "2021-06-29T07:18:06.695391Z",
     "iopub.status.busy": "2021-06-29T07:18:06.694318Z",
     "iopub.status.idle": "2021-06-29T07:18:06.891887Z",
     "shell.execute_reply": "2021-06-29T07:18:06.892746Z",
     "shell.execute_reply.started": "2021-06-29T06:58:22.260736Z"
    },
    "papermill": {
     "duration": 0.215142,
     "end_time": "2021-06-29T07:18:06.893042",
     "exception": false,
     "start_time": "2021-06-29T07:18:06.677900",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "feature_19 : 0.0291\n",
      "feature_54 : 0.0282\n",
      "feature_43 : 0.0249\n",
      "feature_39 : 0.0220\n",
      "feature_12 : 0.0218\n",
      "feature_20 : 0.0201\n",
      "feature_50 : 0.0196\n",
      "feature_33 : 0.0186\n",
      "feature_69 : 0.0184\n",
      "feature_56 : 0.0184\n",
      "feature_25 : 0.0180\n",
      "feature_10 : 0.0175\n",
      "feature_18 : 0.0172\n",
      "feature_31 : 0.0165\n",
      "feature_8  : 0.0163\n",
      "feature_62 : 0.0161\n",
      "feature_37 : 0.0152\n",
      "feature_5  : 0.0150\n",
      "feature_14 : 0.0150\n",
      "feature_51 : 0.0147\n",
      "feature_65 : 0.0147\n",
      "feature_3  : 0.0142\n",
      "feature_2  : 0.0142\n",
      "feature_26 : 0.0141\n",
      "feature_53 : 0.0140\n",
      "feature_35 : 0.0140\n",
      "feature_16 : 0.0138\n",
      "feature_48 : 0.0136\n",
      "feature_9  : 0.0135\n",
      "feature_60 : 0.0135\n",
      "feature_29 : 0.0134\n",
      "feature_71 : 0.0132\n",
      "feature_11 : 0.0131\n",
      "feature_28 : 0.0130\n",
      "feature_72 : 0.0130\n",
      "feature_21 : 0.0125\n",
      "feature_73 : 0.0124\n",
      "feature_59 : 0.0122\n",
      "feature_58 : 0.0121\n",
      "feature_61 : 0.0120\n",
      "feature_40 : 0.0118\n",
      "feature_38 : 0.0114\n",
      "feature_55 : 0.0114\n",
      "feature_46 : 0.0113\n",
      "feature_67 : 0.0112\n",
      "feature_22 : 0.0112\n",
      "feature_6  : 0.0112\n",
      "feature_44 : 0.0112\n",
      "feature_13 : 0.0111\n",
      "feature_0  : 0.0110\n",
      "feature_27 : 0.0110\n",
      "feature_68 : 0.0109\n",
      "feature_30 : 0.0109\n",
      "feature_36 : 0.0108\n",
      "feature_70 : 0.0107\n",
      "feature_1  : 0.0107\n",
      "feature_34 : 0.0105\n",
      "feature_42 : 0.0103\n",
      "feature_41 : 0.0102\n",
      "feature_23 : 0.0102\n",
      "feature_45 : 0.0097\n",
      "feature_57 : 0.0095\n",
      "feature_32 : 0.0095\n",
      "feature_64 : 0.0095\n",
      "feature_52 : 0.0094\n",
      "feature_7  : 0.0092\n",
      "feature_63 : 0.0091\n",
      "feature_17 : 0.0091\n",
      "feature_4  : 0.0089\n",
      "feature_24 : 0.0088\n",
      "feature_66 : 0.0083\n",
      "feature_15 : 0.0079\n",
      "feature_47 : 0.0079\n",
      "feature_49 : 0.0077\n",
      "feature_74 : 0.0073\n"
     ]
    }
   ],
   "source": [
    "ranks = sorted(list(zip(X.columns, model.feature_importances_)), \n",
    "               key=lambda x: x[1], reverse=True)\n",
    "\n",
    "for feature, score in ranks:\n",
    "    print(f\"{feature:10} : {score:0.4f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "demographic-washer",
   "metadata": {
    "papermill": {
     "duration": 0.014565,
     "end_time": "2021-06-29T07:18:06.924933",
     "exception": false,
     "start_time": "2021-06-29T07:18:06.910368",
     "status": "completed"
    },
    "tags": []
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.10"
  },
  "papermill": {
   "default_parameters": {},
   "duration": 632.894691,
   "end_time": "2021-06-29T07:18:08.241386",
   "environment_variables": {},
   "exception": null,
   "input_path": "__notebook__.ipynb",
   "output_path": "__notebook__.ipynb",
   "parameters": {},
   "start_time": "2021-06-29T07:07:35.346695",
   "version": "2.3.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
